######    code for Range Adjusted Measure DEA   #######
###     O'Hara and Sirianni (2014)

##   Compile (run) functons at the bottom first
##   Then run this top part down to where it says "run to here before using functions"

##  Calls to the functions are of the form:
##  op.eff(ref.mat, eval.mat, show = TRUE/FALSE, ... )
## where ref.mat is the matrix of schools that form the frontier, eval.mat is the matrix of schools to be 
##     evaluated. I separated them so they do not have to be the same. If no separate eval.mat is entered, it assumes that it is the same as the reference matrix. 
## show tells R whether to show the efficiency scores after running it (default is TRUE, which means if you do not put a value in for it, it assumes it to be TRUE)
## when running env.eff, there is an additional argument optim which tells R whether to first find operational efficiency (optim = TRUE) or to evaluate env.eff at the current status quo levels (optim = FALSE)

rm(list=ls())   ##  this just clears the memory to avoid confusion of things already in memory 

###   Set the root directory of the project here, eh?
setwd("/Users/michael/Desktop/Working")

###########################
##   specify these please

m <- 2         ## number of inputs
s <- 2         ## number of good outputs
h <- 1         ## number of bad outputs
nd <- 2        ## number of nondiscretionary variables  

year <- 2011

##  Thank you... 


echo=FALSE;


library(lpSolve)
#library(Hmisc)


###   input final dataset
dat.mat <- read.table((paste(getwd(),"/data/final/final_data",year,".csv",sep="")), header = TRUE, sep = ",", stringsAsFactors = FALSE)

##  run this part
############################
##  subset only variables needed and rescale things
##  data table needs to be in order of inputs, good outputs, bad outputs, non-discretionary vars
##  with each column being one input or output


attach(dat.mat)
dat.tab <- data.frame(instnm, CarnClass,    ##  identifiers
                      InstrExp = InstrExp/1000, ResExp = ResExp/1000,     ##  inputs
                      FTE, Totres = TotRes/1000,      ##  good outputs
                      totemiss12,           ##  bad outputs
                      hdd, cdd              ##  non-discretionary vars
                      )       
detach(dat.mat)
dat.mat <- dat.tab


res.schools <- subset(dat.mat, (CarnClass == "R1" | CarnClass == "R2" | CarnClass == "R"))
R1.schools <- subset(dat.mat, (CarnClass == "R1" ))
R2.schools <- subset(dat.mat, (CarnClass == "R2" ))
AS.schools <- subset(dat.mat, (CarnClass == "AS" ))



##########  run to here before using functions
##########  Then, go to bottom of code and compile the functions
##########  (highlight the code and execute it)
########################################
########################################


####  REPLICATION OF RESULTS IN PAPER

###  Code below replicates Table 1
###  the efficiency scores for R1 schools compared to all research schools
### the last line produces  a Latex formatted table

R1table <- eff.table(res.schools, R1.schools)  ##  compares R1 schools to all research schools
latex(format.df(R1table$eff.table,dec = 4), file = "", rowname = NULL, caption = "R1 schools compared to all research institutions", label = "R1")  ## Tabel 1

###  Code below replicates Table 2
###  the efficiency scores for R2 schools compared to all research schools
### the last line produces  a Latex formatted table

R2table <- eff.table(res.schools, R2.schools)  ##  compares R2 schools to all research schools
latex(format.df(R2table$eff.table,dec = 4), file = "", rowname = NULL, caption = "R2 schools compared to all research institutions", label = "R2")  ## Tabel 1

###  Code below replicates Table 3
###  the efficiency scores for Arts and Sciences (AS) schools compared to all research schools
### the last line produces  a Latex formatted table
### NB: Washington & Jefferson college needs the & changed to "and" before the table will work in Latex

AStable <- eff.table(AS.schools)  ##  compares R2 schools to all research schools
latex(format.df(AStable$eff.table,dec = 4), file = "", rowname = NULL, caption = "Liberal Arts colleges", label = "AS")  ## Tabel 3

###  Code below replicates Table 4
###  the optimal slacks for AS schools using th Total Efficiency model
###  This is simply meant for illustration that these slacks can be examined directly using this approach
### the last line produces  a Latex formatted table


te <- tot.eff(AS.schools)
teslax <- te$slax.df
latex(format.df(teslax,dec = 0), file = "", rowname = NULL, 
                colhead = c("InstrExp", "ResExp", "FTE", "ResGrants", "Emissions"),
                caption = "Optimal slacks: Liberal Arts colleges under Total Efficiency", label = "ASslax")  ## Tabel 1
      

###  produce table of summary values for models (Table 5)
###  This produces the numbers
###  For the table in the paper, need to add in some row headings for the schools types
###  I could not figure out how to put that in the code

summary.table <- rbind(R1table$sum.tab, R2table$sum.tab, AStable$sum.tab)
summary.table
latex(format.df(summary.table, dec = 2), file = "", rowname = NULL,caption = "Summary of models", label = "summodels")  ## Tabel 5
#)



###################   Functions
####   highlight from here to compile them
###########################################

##        efficiency tables:
##         This function runs all four models
##         and produces a table of efficiency scores for all of them
##         along with schools names
##         It is meant to easily produce tables for the paper. 
##         Latexing the output (which is a dataframe) makes a nice table. 

eff.table <- function(r.dat, e.dat = r.dat, show = TRUE, summary = FALSE){
oe <- op.eff(r.dat, e.dat, show = FALSE)
te <- tot.eff(r.dat, e.dat, show = FALSE)
ee.opt <- env.eff(r.dat, e.dat, optim = TRUE, show = FALSE)
ee.sq <- env.eff(r.dat, e.dat, optim = FALSE, show = FALSE)

eff.scores <- data.frame( OpEff = oe$eff.scores, 
                          EnvEff = ee.opt$eff.scores,
                          EnvEffStatQuo = ee.sq$eff.scores,
                          TotEff = te$eff.scores)
                          
###   This part computes a summary table which, for each model
###   gives the proportion of schools that are efficient and 
###   the mean efficiency for those that are not efficient                          
eff.prop <- apply(eff.scores,2,function(x) (ifelse(x >.999, 1,0)))
ineff.prop <- 1-eff.prop
n.ineff <- nrow(eff.scores)* (colMeans(ineff.prop))
eff.prop <- colMeans(eff.prop)
ineff.means <- colSums(ineff.prop * eff.scores)/n.ineff

row.names <- c("Proportion efficient","Mean efficiency")
sum.mat <- rbind(eff.prop, ineff.means)
sum.tab <- data.frame(row.names, sum.mat)  


tot.display <- data.frame(Institution = e.dat$instnm, 
                          OpEff = oe$eff.scores, 
                          EnvEff = ee.opt$eff.scores,
                          EnvEffStatQuo = ee.sq$eff.scores,
                          TotEff = te$eff.scores)
                          
                          if(show == TRUE){
                          	print(tot.display); if(summary == TRUE){print(sum.tab)}}
                          	
eff.tab.output <- list(sum.tab = sum.tab, eff.table = tot.display)
      return(eff.tab.output)
      }
      
      
###########################################
##            operational efficiency
##    does not include the nondiscretionary vars 

op.eff <-  function (roe.dat, eoe.dat = roe.dat, show = TRUE) 
{

ref.mat <- as.matrix(roe.dat[,3:ncol(roe.dat)])
eval.mat <- as.matrix(eoe.dat[,3:ncol(eoe.dat)])

n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,3:(m+2)]
#Y <- ref.mat[,(m+3):(m+2+s)]
#U <- ref.mat[,(2+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- ref.mat[,(1+m+s):(m+s+h)]
ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- max(U) #apply(U,2,max)
u.lower <- min(U) #apply(U,2,min)

R.g <- 1/((m+s)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s)* (x.upper - x.lower))  ## range for inputs
#R.b <- 1/(h* (u.upper - u.lower))      ## range for bads

xynd.mat <- cbind(X,Y,ND)
xyund.mat <- cbind(X,Y, U, ND)

xynd.base <- eval.mat[,-((m+s+1):(m+s+h))] 
    

obj <- c(R.x,R.g, rep(0,n))
f.con1 <- as.matrix(t(xynd.mat))
con2.signs <- c(rep(1,m), rep(-1,s))

nd.rows <- matrix(0,nd,(m+s))  ## add constraint rows for non-discretionary variables                                             
f.con2 <- rbind(diag(con2.signs),nd.rows)

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,(m+s)), rep(1,n)))   ## convexity, sum of lambdas = 1

l.con2 <- cbind( (matrix(0,n,m+s)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

#constr <- rbind(constr,l.con)

s.con <- cbind(diag(1,(m+s)), matrix(0,(m+s),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)    ##  full matrix of constraints

##  vector of signs for constraints
f.dir <- c(rep("==", (m+s+nd+1)), rep(">=", (m+s+n)))

      results <- matrix(0,n.dmu,(m+s+n))
      for (i in 1:n.dmu){
                     rhs <-  c(t(xynd.base[i,]),1,rep(0,(m+s+n)))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; #print(sol)
                     results[i,] <- sol
                     }
opt.slax <- results[,1:(m+s)]
                     
eff.scores <- 1- opt.slax %*% obj[1:(m+s)]
if(show == TRUE){
	print("efficiency scores")
	oe.df <- data.frame(institution = eoe.dat$instnm, op.eff = eff.scores)
     print(oe.df) 
    } 

op.eff.output <- list(slax.df = data.frame(instnm = eoe.dat$instnm, results[,1:(m+s)]),opt.slax = results[,1:(m+s)], lambdas = results[,(m+s+1):ncol(results)], eff.scores = eff.scores)
    return(op.eff.output)
}

###########################################
####   environmental efficiency

env.eff <- function (ree.dat, eee.dat = ree.dat, optim = FALSE, show = TRUE) 
{

ref.mat <- as.matrix(ree.dat[,3:ncol(ree.dat)])
eval.mat <- as.matrix(eee.dat[,3:ncol(eee.dat)])


n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,4:(m+3)]
#Y <- ref.mat[,(m+4):(m+3+s)]
#U <- ref.mat[,(3+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- ref.mat[,(1+m+s):(m+s+h)]
ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- max(U) #apply(U,2,max)
u.lower <- min(U) #apply(U,2,min)

R.g <- 1/((m+s+h)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s+h)* (x.upper - x.lower))  ## range for inputs
if (optim == TRUE) {R.b <- 1/((m+s+h)* (u.upper - u.lower))} else {R.b <- 1/((h)* (u.upper - u.lower))}      ## range for bads

xyund.mat <- cbind(X,Y, U, ND)

xyund.base <- eval.mat



obj <- c(R.x, R.g, R.b, rep(0,n))   ## Has all inputs and outputs in it, but x and g are 
                                   ## multiplied by zero from con2.signs below
f.con1 <- as.matrix(t(xyund.mat))
con2.signs <- c(rep(0,m), rep(0,s), rep(1,h))  ## difference between this and total eff
                                               ## is the zeroes here
nd.rows <- matrix(0,nd,(m+s+h))  ## add constraint rows for non-discretionary variables                                             
f.con2 <- rbind(diag(con2.signs),nd.rows)

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,m+s+h), rep(1,n)))   ## convexity, sum of lambdas = 1
l.con2 <- cbind( (matrix(0,n,m+s+h)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

s.con <- cbind(diag(1,(m+s+h)), matrix(0,(m+s+h),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)

f.dir <- c(rep("==", (m+s+h+nd+1)), rep(">=", (n)), rep("==",(m+s)), rep(">=", h))

##  set inputs and outputs at optimal levels
##  for rhs matrix
if (optim == TRUE) {  ## not sure if this part works yet
oe <- op.eff(ree.dat, eee.dat, show = FALSE)
opt.slax <- oe$opt.slax
opt.slax <- cbind(opt.slax, matrix(0,n.dmu,h + nd))
rhs.signs <- c(rep(-1,m), rep(1,s), rep(0,h), rep(0,nd))
rhs.sign.mat <- matrix(rhs.signs, n.dmu,(m+s+h+nd), byrow = TRUE)
rhs.mat <- xyund.base + (rhs.sign.mat * opt.slax)
             } else {
	                 (rhs.mat <- xyund.base)}


      results <- matrix(0,n.dmu,(m+s+h+n))
      for (i in 1:n.dmu){
                     rhs <-  c(t(rhs.mat[i,]),1,rep(0,(m+s+h+n)))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; #print(sol)
                     results[i,] <- sol
                     }


 
opt.slax <- results[,1:(m+s+h)]

                     
eff.scores <- 1- opt.slax[,((m+s+1):(m+s+h))] * obj[(m+s+1):(m+s+h)]

if(show == TRUE){
	caption <- ifelse((optim == TRUE), "Assuming optimal operational efficiency", "Assuming status quo of outputs and inputs")
print(caption)
ee.df <- data.frame(institution = eee.dat$instnm, env.eff = eff.scores)
print(ee.df) }


env.eff.output <- list(slax.df = data.frame(instnm = eee.dat$instnm, results[,1:(m+s+h)]), opt.slax = results[,1:(m+s+h)], lambdas = results[,(m+s+h+1):ncol(results)], eff.scores = eff.scores)
    return(env.eff.output)
}

###########################################
##            total efficiency 

tot.eff <-  function (rte.dat, ete.dat = rte.dat, show = TRUE) 
{

ref.mat <- as.matrix(rte.dat[,3:ncol(rte.dat)])
eval.mat <- as.matrix(ete.dat[,3:ncol(ete.dat)])

n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,4:(m+3)]
#Y <- ref.mat[,(m+4):(m+3+s)]
#U <- ref.mat[,(3+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- ref.mat[,(1+m+s):(m+s+h)]
ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]

 

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- max(U) #apply(U,2,max)
u.lower <- min(U) #apply(U,2,min)

R.g <- 1/((m+s+h)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s+h)* (x.upper - x.lower))  ## range for inputs
R.b <- 1/((m+s+h)* (u.upper - u.lower))      ## range for bads

xyund.mat <- cbind(X,Y, U, ND)

xyund.base <- eval.mat
    

obj <- c(R.x,R.g, R.b, rep(0,n))    
f.con1 <- as.matrix(t(xyund.mat))
con2.signs <- c(rep(1,m), rep(-1,s), rep(1,h))

nd.rows <- matrix(0,nd,(m+s+h))  ## add constraint rows for non-discretionary variables                                             
f.con2 <- rbind(diag(con2.signs),nd.rows)

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,(m+s+h)), rep(1,n)))   ## convexity, sum of lambdas = 1

l.con2 <- cbind( (matrix(0,n,m+s+h)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

#constr <- rbind(constr,l.con)

s.con <- cbind(diag(1,(m+s+h)), matrix(0,(m+s+h),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)    ##  full matrix of constraints

##  vector of signs for constraints


f.dir <- c(rep("==", (m+s+h+nd+1)), rep(">=", (n)), rep(">=", (m+s+h)))

      results <- matrix(0,n.dmu,(m+s+h + n))
      for (i in 1:n.dmu){print(dim(constr))
                     rhs <-  c(t(xyund.base[i,]),1,rep(0,(m+s+h+n)))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; 
                     results[i,] <- sol
                     }
opt.slax <- results[,1:(m+s+h)]
                     
eff.scores <- 1- opt.slax %*% obj[1:(m+s+h)]  
te.df <- data.frame(institution = ete.dat$instnm, tot.eff = eff.scores)                    

if(show == TRUE){
	print("efficiency scores")
        print(te.df)} 

tot.eff.output <- list(opt.slax <- results[,1:(m+s+h)], slax.df = data.frame(instnm = ete.dat$instnm, results[,1:(m+s+h)]), lambdas = results[,(m+s+1):ncol(results)], eff.scores = eff.scores, te.df = te.df)
    return(tot.eff.output)
}

###   Highlight down to here when compiling functions 
